clear all
set more off , perm
capture log close


*********************************************************************************
******** This program loads french gas prices and number of gas stations ********
*********************************************************************************

		
*****************************
**** French Postal codes ****
*****************************	
		
// Load and clean data	
	*Load ZIP-codes France
	import excel "$raw\04_France_prices\France_ZIP codes.xlsx" , cellrange(A1:I20414) firstrow clear
	
	*Only keep postalcode, State
	keep PostalCode State
	
	*Rename PostalCode cp_pdv 
	rename PostalCode cp_pdv
	rename State state
	
	*Drop Duplicates
	sort cp_pdv
	quietly by cp_pdv: gen dup = cond(_N==1,0,_n)
	drop if dup>1
	drop dup
	
// Save data
	cd "$dta"
	save postalcode_france.dta , replace
	
***********************
**** French Taxes *****
***********************
	
	// Load and clean data: petrol (E5)	& diesel 
	*Load Tax France
	foreach v in petrol diesel {
	import excel "$raw\04_France_prices\Frankreich_Steuern und Abgaben_`v'.xlsx" , cellrange(A2:F24) firstrow clear
	
	*Drop
	drop F
	
	*Rename
	rename TICPE TICPE2012
	rename C TICPE20131
	rename D TICPE20132
	rename E TICPE2014
	
	*Label
	label variable TICPE2012 "TICPE from 01.01.12-31.12.12"
	label variable TICPE20131 "TICPE from 01.01.2013-10.01.2013"
	label variable TICPE20132 "TICPE from 11.01.2013-31.12.2013"
	label variable TICPE2014 "TICPE from 01.01.2014-31.12.2014"

	*Reshape 
	reshape long TICPE@, i(state) j(period)
	
	*Save
	cd "$dta"
	save TICPE_france_`v'.dta , replace
	}
	
***********************
**** French Prices ****
***********************

// Load, Append, Save
	*Load
	cd "$raw\04_France_prices"
	use PrixCarburants_annuel_2013.dta , clear
	
	*Append 2014
	append using PrixCarburants_annuel_2014.dta

// Clean data
	*Drop all prices but SP95 & gazole 
	keep if (id_prix==2 & nom=="SP95") |(id_prix==1 & nom=="Gazole")
	
	* Drop stations on the motorway
	drop if pop == "A"
	drop pop
	
	*Split variable date in to date and time (to later merge with TICPE-rates)
	gen year = substr(date,1,4)
	gen month = substr(date,6,2)
	gen day = substr(date,9,2)
	destring year, replace
	destring month, replace
	destring day, replace
	gen date2 =mdy(month,day,year)
	gen time = clock(substr(date, 12, 8), "hms")
	format time %tcHH:MM:SS
	format date2 %td
	drop month day year date
	rename date2 date
	
	*Save file
	cd "$dta"
	save france_price.dta , replace	
		
	use france_price.dta,clear
	* Save SP95
	preserve
	keep if (id_prix==2 & nom=="SP95")
	rename valeur e5
	drop nom id_prix
	
	save france_price_sp95.dta , replace	
	restore
	
	* Save Gazole
	preserve
	keep if (id_prix==1 & nom=="Gazole")
	rename valeur gazole
	drop nom id_prix
	save france_price_gazole.dta , replace	
	restore

****************************
**** Create Price Panel ****
****************************

// Create panel basis with one obs per station per date
	* Prepare dates
	import excel "$raw\04_France_prices\date.xlsx" , first clear
	drop if date == .
	drop if year(date)==2012 
	gen pepe = 1
	cd "$dta"
	save dates.dta , replace
	
	* Prepare station ids
	use france_price.dta , clear
	keep id_pdv cp_pdv lat lon
	
	* Clean data (reversed lat & lon; incorrect postal code)
	replace lat = 4794533 if id_pdv == 68250001
	replace lon = 728939.31 if id_pdv == 68250001
	replace cp_pdv = 71000 if id_pdv ==71000005 & cp_pdv == 97213
	
	duplicates drop
	
	* Save station characteristics dta
	save station_charac.dta , replace
	
	
	drop cp_pdv lat lon
	* Assert that uniqueness remains
	bysort id_pdv: egen pepe = count(id_pdv)
	assert pepe == 1
	drop pepe
	
	* Range join
	gen pepe_min = 0
	gen pepe_max = 2
	rangejoin pepe pepe_min pepe_max using dates.dta
	
	format date %td

	* Drop auxilliary variables
	drop pepe_min pepe_max pepe
	
	* Save panel basis and erase auxilliary dtas
	save panel_basis.dta , replace
	erase dates.dta
	
// Open sp95 data set
	cd "$dta"
	use france_price_sp95.dta , clear
	
// Deal with duplicates
	*Drop duplicates (notifications of the same price during the same second at the same station)
		duplicates drop id_pdv date time e5 , force

	* Drop station-level characteristics (saved in other data set)
	drop cp_pdv lat lon
		
	* Check whether there remain duplicates with different prices
		duplicates report id_pdv date time									// 39 duplicates remain --> drop
		duplicates drop id_pdv date time , force
	
	
//	Generate prices at different times of day
	foreach i in e5 {
	forval j=5(1)22 {
		bysort id_pdv date (time): gen `i'_`j'oclock_help1 = cond(time[_n] < `j'*60*60*1000, `i'[_n], .)
		bysort id_pdv date (time): replace `i'_`j'oclock_help1 = cond(id_pdv[_n] == id_pdv[_n+1] & time [_n+1] < `j'*60*60*1000,., `i'_`j'oclock_help1[_n])
		bysort id_pdv date: egen `i'_`j'oclock = sum(`i'_`j'oclock_help1)
		bysort id_pdv (date time): gen `i'_`j'oclock_help2 = cond(`i'_`j'oclock[_n] == 0 & id_pdv[_n] == id_pdv[_n-1] & date[_n] != date[_n-1] ,`i'[_n-1], .)
		bysort id_pdv date: replace `i'_`j'oclock = sum(`i'_`j'oclock_help2) if `i'_`j'oclock == 0
		replace `i'_`j'oclock = . if `i'_`j'oclock == 0 // There are missings if at the opening day there is a price notification after `j'oclock	
		drop `i'_`j'oclock_help*
		}
		}
		
// Merge with panel basis
	merge m:1 id_pdv date using panel_basis.dta
	drop _merge		
	
// Intrapolation rule: If there is no notification on a particular day, then the price of the day before is still correct. (Valid, since we observe population of price changes.)
	bysort id_pdv (date time): replace e5 = e5[_n-1] if e5 == .
	forval j=5(1)22 {
		replace e5_`j'oclock = e5 if time == .
		}

// Create unique observation per station and date
	drop time e5
	duplicates drop
	
	* Assert uniqueness
	bysort id_pdv date: egen pepe = count(id_pdv)
	assert pepe == 1
	drop pepe				
		
// Save intermediate data
	cd "$dta"
	save france_price_sp95_pepe.dta,replace

	
	
// Open gazole data set
	cd "$dta"
	use france_price_gazole.dta , clear
	
// Deal with duplicates
	*Drop duplicates (notifications of the same price during the same second at the same station)
		duplicates drop id_pdv date time gazole , force

	* Drop station-level characteristics (saved in other data set)
	drop cp_pdv lat lon
		
	* Check whether there remain duplicates with different prices
		duplicates report id_pdv date time									// 53 duplicates remain --> drop
		duplicates drop id_pdv date time , force
	
	
//	Generate prices at different times of day
	foreach i in gazole {
	forval j=5(1)22 {
		bysort id_pdv date (time): gen `i'_`j'oclock_help1 = cond(time[_n] < `j'*60*60*1000, `i'[_n], .)
		bysort id_pdv date (time): replace `i'_`j'oclock_help1 = cond(id_pdv[_n] == id_pdv[_n+1] & time [_n+1] < `j'*60*60*1000,., `i'_`j'oclock_help1[_n])
		bysort id_pdv date: egen `i'_`j'oclock = sum(`i'_`j'oclock_help1)
		bysort id_pdv (date time): gen `i'_`j'oclock_help2 = cond(`i'_`j'oclock[_n] == 0 & id_pdv[_n] == id_pdv[_n-1] & date[_n] != date[_n-1] ,`i'[_n-1], .)
		bysort id_pdv date: replace `i'_`j'oclock = sum(`i'_`j'oclock_help2) if `i'_`j'oclock == 0
		replace `i'_`j'oclock = . if `i'_`j'oclock == 0 // There are missings if at the opening day there is a price notification after `j'oclock	
		drop `i'_`j'oclock_help*
		}
		}
		
// Merge with panel basis
	merge m:1 id_pdv date using panel_basis.dta
	drop _merge


// Intrapolation rule: If there is no notification on a particular day, then the price of the day before is still correct. (Valid, since we observe population of price changes.)
	bysort id_pdv (date time): replace gazole = gazole[_n-1] if gazole == .
	forval j=5(1)22 {
		replace gazole_`j'oclock = gazole if time == .
		}

// Create unique observation per station and date
	drop time gazole
	duplicates drop
	
	* Assert uniqueness
	bysort id_pdv date: egen pepe = count(id_pdv)
	assert pepe == 1
	drop pepe		

	
// Save intermediate data
	cd "$dta"
	save france_price_gazole_pepe.dta,replace
	

// Add prices to panel
	use panel_basis.dta, clear
	
// Merge prices to panel basis
	merge 1:m id_pdv date using france_price_sp95_pepe.dta
	drop _merge
	
	merge 1:m id_pdv date using france_price_gazole_pepe.dta
	drop _merge

// Save intermediate panel
	save interm_panel.dta , replace
	erase panel_basis.dta
	
	
// Assign region to postal code
	*Merge station characteristics with regions
	use station_charac.dta , clear

	* Fix postal codes via manual search
	replace cp_pdv = 20140 if cp_pdv == 20156
	replace cp_pdv = 4200 if cp_pdv == 4204
	replace cp_pdv = 34470 if cp_pdv == 34475
	replace cp_pdv = 76400 if cp_pdv == 76371
	replace cp_pdv = 89340 if cp_pdv == 89720
	
	merge m:1 cp_pdv using postalcode_france.dta 
	drop if _merge==2
	drop _merge

// Merge station characteristics with price panel
	merge 1:m id_pdv using interm_panel.dta
	assert _merge == 3
	drop _merge
	erase station_charac.dta
	erase interm_panel.dta
	
// Merge TICPE rates
	* Gen Variable period to merge over
	gen period=year(date)
	replace period=20131 if date<=date("20130110","YMD") & date>=date("20130101","YMD")
	replace period=20132 if date>=date("20130111","YMD") & date<=date("20131231","YMD")
	
	*Merge TICPE on petrol
	merge m:1 state period using TICPE_france_petrol.dta 
	keep if _merge == 3
	drop _merge

	*Destring TICPE
	destring TICPE, replace
	replace TICPE = TICPE / 100
	rename TICPE TICPE_petrol
	
	*Merge TICPE on diesel
	merge m:1 state period using TICPE_france_diesel.dta 
	keep if _merge == 3
	drop _merge

	*Destring TICPE
	destring TICPE, replace
	replace TICPE = TICPE / 100
	rename TICPE TICPE_diesel
	
	* Convert price in Euro
	forval j=5(1)22 {
		replace e5_`j'oclock = e5_`j'oclock / 1000
		replace gazole_`j'oclock = gazole_`j'oclock / 1000
		}
	
// Only keep weekdays
	gen dow = dow(date)
	drop if dow == 0 | dow == 6
	
// Label Variables
	label variable id_pdv "id of petrol-station"
	label variable cp_pdv "Postal code of petrol-station"
	label variable lat "Latitude"
	label variable lon "Longitude"
	label variable date "Date"
	label variable state "State"
	label variable dow "Day of the week"
	forval j=5(1)22 {
		label variable e5_`j'oclock "Gross price petrol `j' o'clock in Euro per litre"
		label variable gazole_`j'oclock "Gross price gazole `j' o'clock in Euro per litre"
		
		}
	label variable TICPE_petrol "TICPE in Euro per litre, petrol"
	label variable TICPE_diesel "TICPE in Euro per litre, diesel"
		
			
// Save data	
	save france_price_panel.dta , replace	
	erase TICPE_france_petrol.dta
	erase TICPE_france_diesel.dta		


***********************************	
** Create 5pm net price data set **
***********************************

// Prepare data set
	cd "$dta"
	use france_price_panel.dta , clear
	keep id_pdv cp_pdv lat lon state date e5_17oclock gazole_17oclock dow TICPE_petrol TICPE_diesel
	rename e5_17oclock gross_p_france
	rename gazole_17oclock gross_p_france_diesel

// Gen net prices: petrol (E5)
	gen net_p_france = gross_p_france/1.196-TICPE_petrol if year(date)<=2013 & state != "Corse"
	replace net_p_france = gross_p_france/1.2-TICPE_petrol if year(date)>2013 & state != "Corse"
	replace net_p_france = gross_p_france/1.13-TICPE_petrol if state == "Corse"
	assert net_p_france != . if gross_p_france != .
	drop TICPE_petrol
	label var net_p_france "Net price petrol 17 o'clock in Euro per litre"
	
// Gen net prices: diesel
	gen net_p_france_diesel = gross_p_france_diesel/1.196-TICPE_diesel if year(date)<=2013 & state != "Corse"
	replace net_p_france_diesel = gross_p_france_diesel/1.2-TICPE_diesel if year(date)>2013 & state != "Corse"
	replace net_p_france_diesel = gross_p_france_diesel/1.13-TICPE_diesel if state == "Corse"
	assert net_p_france_diesel != . if gross_p_france_diesel != .
	drop TICPE_diesel
	label var net_p_france_diesel "Net price diesel 17 o'clock in Euro per litre"	

// Keep only 2013 and 2014
	keep if year(date) == 2013 | year(date) == 2014
	
// Merge with Rotterdam prices
	merge m:1 date using "$dta\rotterdam_price.dta"

// Tab date if no Rotterdam price --> Missing on Christmas, New Year and Easter
	tab date if _merge == 1
	drop _merge
	
// Carry-forward Rotterdam prices
	bysort id_pdv (date): replace p_rotterdam = p_rotterdam[_n-1] if p_rotterdam == .
	drop if p_rotterdam == .													// This method not possible for 1 Jan 2013

// Generate margins
	replace p_rotterdam = p_rotterdam/100
	gen margin_france = net_p_france - p_rotterdam								// petrol (E5)
	gen margin_france_diesel = net_p_france_diesel - p_rotterdam				// diesel
	drop p_rotterdam
	label var margin_france "Gross margin France Euro per litre, petrol"
	label var margin_france_diesel "Gross margin France Euro per litre, diesel"
	
// Save
	cd "$dta"
	save france_margins_5pm.dta , replace
	erase france_price.dta 
	erase france_price_gazole.dta
	erase france_price_gazole_pepe.dta 
	erase france_price_panel.dta 
	erase france_price_sp95.dta 
	erase france_price_sp95_pepe.dta 
	erase postalcode_france.dta 














